<!DOCTYPE html>
<html lang="zh">
<head>
  <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1, maximum-scale=2">
<meta name="theme-color" content="#222">
<meta name="generator" content="Hexo 5.3.0">


  <link rel="apple-touch-icon" sizes="180x180" href="/yuwanzi.io/images/apple-touch-icon-next.png">
  <link rel="icon" type="image/png" sizes="32x32" href="/yuwanzi.io/images/favicon-32x32-next.png">
  <link rel="icon" type="image/png" sizes="16x16" href="/yuwanzi.io/images/favicon-16x16-next.png">
  <link rel="mask-icon" href="/yuwanzi.io/images/logo.svg" color="#222">

<link rel="stylesheet" href="/yuwanzi.io/css/main.css">



<link rel="stylesheet" href="//cdn.jsdelivr.net/npm/@fortawesome/fontawesome-free@5.15.1/css/all.min.css">
  <link rel="stylesheet" href="//cdn.jsdelivr.net/npm/animate.css@3.1.1/animate.min.css">

<script class="hexo-configurations">
    var NexT = window.NexT || {};
    var CONFIG = {"hostname":"suyuhuan.gitee.io","root":"/yuwanzi.io/","images":"/yuwanzi.io/images","scheme":"Muse","version":"8.2.0","exturl":false,"sidebar":{"position":"left","display":"post","padding":18,"offset":12},"copycode":false,"bookmark":{"enable":false,"color":"#222","save":"auto"},"fancybox":false,"mediumzoom":false,"lazyload":false,"pangu":false,"comments":{"style":"tabs","active":null,"storage":true,"lazyload":false,"nav":null},"motion":{"enable":true,"async":false,"transition":{"post_block":"fadeIn","post_header":"fadeInDown","post_body":"fadeInDown","coll_header":"fadeInLeft","sidebar":"fadeInUp"}},"prism":false,"i18n":{"placeholder":"Suche...","empty":"We didn't find any results for the search: ${query}","hits_time":"${hits} results found in ${time} ms","hits":"${hits} results found"}};
  </script>
<meta name="description" content="什么是编码? 对于普通人来说,编码总是与一些秘密的东西相关联(加密与解密);对于程序员们来说,编码大多数是指一种用来在机器与人之间传递信息的方式. 但从广义上来讲,编码是从一种信息格式转换为另一种信息格式的过程,解码则是编码的逆向过程.接下来举几个使用到编码的例子:   当我们要把想表达的意思通过一种语言表达出来,其实就是在脑海中对信息进行了一次编码,而对方如果也懂得这门语言,那么就可以用这门语言">
<meta property="og:type" content="article">
<meta property="og:title" content="编码的那点事儿">
<meta property="og:url" content="https://suyuhuan.gitee.io/yuwanzi.io/2017/08/20/2017-08-20-Encode/index.html">
<meta property="og:site_name" content="玉丸子 | Blog">
<meta property="og:description" content="什么是编码? 对于普通人来说,编码总是与一些秘密的东西相关联(加密与解密);对于程序员们来说,编码大多数是指一种用来在机器与人之间传递信息的方式. 但从广义上来讲,编码是从一种信息格式转换为另一种信息格式的过程,解码则是编码的逆向过程.接下来举几个使用到编码的例子:   当我们要把想表达的意思通过一种语言表达出来,其实就是在脑海中对信息进行了一次编码,而对方如果也懂得这门语言,那么就可以用这门语言">
<meta property="og:locale">
<meta property="og:image" content="http://wx4.sinaimg.cn/large/63503acbly1fith5ayrgdj20n50ct0tj.jpg">
<meta property="og:image" content="http://wx1.sinaimg.cn/large/63503acbly1fith5be3u3j20o80bcgmk.jpg">
<meta property="og:image" content="http://wx1.sinaimg.cn/large/63503acbly1fith5bqcprj20r00bcjsi.jpg">
<meta property="og:image" content="http://wx4.sinaimg.cn/large/63503acbly1fith5c3ib8j20qg0ck3zq.jpg">
<meta property="og:image" content="http://wx2.sinaimg.cn/large/63503acbly1fith5cmmpbj20w008ot9i.jpg">
<meta property="article:published_time" content="2017-08-20T04:00:00.000Z">
<meta property="article:modified_time" content="2020-11-07T00:58:17.000Z">
<meta property="article:author" content="玉丸子">
<meta property="article:tag" content="Java">
<meta property="article:tag" content="2017">
<meta property="article:tag" content="后端">
<meta property="article:tag" content="编码">
<meta name="twitter:card" content="summary">
<meta name="twitter:image" content="http://wx4.sinaimg.cn/large/63503acbly1fith5ayrgdj20n50ct0tj.jpg">


<link rel="canonical" href="https://suyuhuan.gitee.io/yuwanzi.io/2017/08/20/2017-08-20-Encode/">


<script class="page-configurations">
  // https://hexo.io/docs/variables.html
  CONFIG.page = {
    sidebar: "",
    isHome : false,
    isPost : true,
    lang   : 'zh'
  };
</script>
<title>编码的那点事儿 | 玉丸子 | Blog</title>
  




  <noscript>
  <style>
  body { margin-top: 2rem; }

  .use-motion .menu-item,
  .use-motion .sidebar,
  .use-motion .post-block,
  .use-motion .pagination,
  .use-motion .comments,
  .use-motion .post-header,
  .use-motion .post-body,
  .use-motion .collection-header {
    visibility: visible;
  }

  .use-motion .header,
  .use-motion .site-brand-container .toggle,
  .use-motion .footer { opacity: initial; }

  .use-motion .site-title,
  .use-motion .site-subtitle,
  .use-motion .custom-logo-image {
    opacity: initial;
    top: initial;
  }

  .use-motion .logo-line {
    transform: scaleX(1);
  }

  .search-pop-overlay, .sidebar-nav { display: none; }
  .sidebar-panel { display: block; }
  </style>
</noscript>

<link rel="alternate" href="/yuwanzi.io/atom.xml" title="玉丸子 | Blog" type="application/atom+xml">
</head>

<body itemscope itemtype="http://schema.org/WebPage" class="use-motion">
  <div class="headband"></div>

  <main class="main">
    <header class="header" itemscope itemtype="http://schema.org/WPHeader">
      <div class="header-inner"><div class="site-brand-container">
  <div class="site-nav-toggle">
    <div class="toggle" aria-label="Navigationsleiste an/ausschalten" role="button">
    </div>
  </div>

  <div class="site-meta">

    <a href="/yuwanzi.io/" class="brand" rel="start">
      <i class="logo-line"></i>
      <h1 class="site-title">玉丸子 | Blog</h1>
      <i class="logo-line"></i>
    </a>
  </div>

  <div class="site-nav-right">
    <div class="toggle popup-trigger">
    </div>
  </div>
</div>







</div>
        
  
  <div class="toggle sidebar-toggle" role="button">
    <span class="toggle-line"></span>
    <span class="toggle-line"></span>
    <span class="toggle-line"></span>
  </div>

  <aside class="sidebar">

    <div class="sidebar-inner sidebar-nav-active sidebar-toc-active">
      <ul class="sidebar-nav">
        <li class="sidebar-nav-toc">
          Inhaltsverzeichnis
        </li>
        <li class="sidebar-nav-overview">
          Übersicht
        </li>
      </ul>

      <div class="sidebar-panel-container">
        <!--noindex-->
        <div class="post-toc-wrap sidebar-panel">
            <div class="post-toc animated"><ol class="nav"><li class="nav-item nav-level-3"><a class="nav-link" href="#%E4%BB%80%E4%B9%88%E6%98%AF%E7%BC%96%E7%A0%81"><span class="nav-number">1.</span> <span class="nav-text">什么是编码?</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E5%B8%B8%E8%A7%81%E7%9A%84%E5%AD%97%E7%AC%A6%E9%9B%86"><span class="nav-number">2.</span> <span class="nav-text">常见的字符集</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#Java%E4%B8%AD%E5%AD%97%E7%AC%A6%E7%9A%84%E7%BC%96%E8%A7%A3%E7%A0%81"><span class="nav-number">3.</span> <span class="nav-text">Java中字符的编解码</span></a><ol class="nav-child"><li class="nav-item nav-level-4"><a class="nav-link" href="#%E7%A4%BA%E4%BE%8B%E4%BB%A3%E7%A0%81"><span class="nav-number">3.1.</span> <span class="nav-text">示例代码</span></a></li></ol></li><li class="nav-item nav-level-3"><a class="nav-link" href="#IO%E4%B8%AD%E7%9A%84%E5%AD%97%E7%AC%A6%E7%BC%96%E7%A0%81"><span class="nav-number">4.</span> <span class="nav-text">IO中的字符编码</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#Web%E4%B8%AD%E7%9A%84%E5%AD%97%E7%AC%A6%E7%BC%96%E7%A0%81"><span class="nav-number">5.</span> <span class="nav-text">Web中的字符编码</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E4%B8%BA%E4%BB%80%E4%B9%88Char%E5%9C%A8Java%E4%B8%AD%E5%8D%A0%E7%94%A8%E4%B8%A4%E4%B8%AA%E5%AD%97%E8%8A%82"><span class="nav-number">6.</span> <span class="nav-text">为什么Char在Java中占用两个字节?</span></a></li><li class="nav-item nav-level-3"><a class="nav-link" href="#%E5%8F%82%E8%80%83%E6%96%87%E7%8C%AE"><span class="nav-number">7.</span> <span class="nav-text">参考文献</span></a></li></ol></div>
        </div>
        <!--/noindex-->

        <div class="site-overview-wrap sidebar-panel">
          <div class="site-author site-overview-item animated" itemprop="author" itemscope itemtype="http://schema.org/Person">
  <p class="site-author-name" itemprop="name">玉丸子</p>
  <div class="site-description" itemprop="description">这里是玉丸子的个人博客,与你一起发现更大的世界。</div>
</div>
<div class="site-state-wrap site-overview-item animated">
  <nav class="site-state">
      <div class="site-state-item site-state-posts">
          <a href="/yuwanzi.io/archives">
          <span class="site-state-item-count">68</span>
          <span class="site-state-item-name">Artikel</span>
        </a>
      </div>
      <div class="site-state-item site-state-categories">
            <a href="/yuwanzi.io/categories/">
        <span class="site-state-item-count">39</span>
        <span class="site-state-item-name">Kategorien</span></a>
      </div>
      <div class="site-state-item site-state-tags">
            <a href="/yuwanzi.io/tags/">
        <span class="site-state-item-count">46</span>
        <span class="site-state-item-name">schlagwörter</span></a>
      </div>
  </nav>
</div>



        </div>
      </div>
    </div>
  </aside>
  <div class="sidebar-dimmer"></div>


    </header>

    
  <div class="back-to-top" role="button">
    <i class="fa fa-arrow-up"></i>
    <span>0%</span>
  </div>

<noscript>
  <div class="noscript-warning">Theme NexT works best with JavaScript enabled</div>
</noscript>


    <div class="main-inner post posts-expand">


  


<div class="post-block">
  
  

  <article itemscope itemtype="http://schema.org/Article" class="post-content" lang="zh">
    <link itemprop="mainEntityOfPage" href="https://suyuhuan.gitee.io/yuwanzi.io/2017/08/20/2017-08-20-Encode/">

    <span hidden itemprop="author" itemscope itemtype="http://schema.org/Person">
      <meta itemprop="image" content="/yuwanzi.io/images/avatar.gif">
      <meta itemprop="name" content="玉丸子">
      <meta itemprop="description" content="这里是玉丸子的个人博客,与你一起发现更大的世界。">
    </span>

    <span hidden itemprop="publisher" itemscope itemtype="http://schema.org/Organization">
      <meta itemprop="name" content="玉丸子 | Blog">
    </span>
      <header class="post-header">
        <h1 class="post-title" itemprop="name headline">
          编码的那点事儿
        </h1>

        <div class="post-meta-container">
          <div class="post-meta">
    <span class="post-meta-item">
      <span class="post-meta-item-icon">
        <i class="far fa-calendar"></i>
      </span>
      <span class="post-meta-item-text">Veröffentlicht am</span>

      <time title="Erstellt: 2017-08-20 12:00:00" itemprop="dateCreated datePublished" datetime="2017-08-20T12:00:00+08:00">2017-08-20</time>
    </span>
      <span class="post-meta-item">
        <span class="post-meta-item-icon">
          <i class="far fa-calendar-check"></i>
        </span>
        <span class="post-meta-item-text">Bearbeitet am</span>
        <time title="Geändert am: 2020-11-07 08:58:17" itemprop="dateModified" datetime="2020-11-07T08:58:17+08:00">2020-11-07</time>
      </span>
    <span class="post-meta-item">
      <span class="post-meta-item-icon">
        <i class="far fa-folder"></i>
      </span>
      <span class="post-meta-item-text">in</span>
        <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
          <a href="/yuwanzi.io/categories/%E5%90%8E%E7%AB%AF/" itemprop="url" rel="index"><span itemprop="name">后端</span></a>
        </span>
          . 
        <span itemprop="about" itemscope itemtype="http://schema.org/Thing">
          <a href="/yuwanzi.io/categories/%E5%90%8E%E7%AB%AF/Java/" itemprop="url" rel="index"><span itemprop="name">Java</span></a>
        </span>
    </span>

  
</div>

        </div>
      </header>

    
    
    
    <div class="post-body" itemprop="articleBody">
        <h3 id="什么是编码"><a href="#什么是编码" class="headerlink" title="什么是编码?"></a>什么是编码?</h3><hr>
<p>对于普通人来说,编码总是与一些秘密的东西相关联(加密与解密);对于程序员们来说,编码大多数是指一种用来在机器与人之间传递信息的方式.</p>
<p>但从广义上来讲,<strong>编码是从一种信息格式转换为另一种信息格式的过程,解码则是编码的逆向过程</strong>.接下来举几个使用到编码的例子: </p>
<ul>
<li><p>当我们要把想表达的意思通过一种语言表达出来,其实就是在脑海中对信息进行了一次编码,而对方如果也懂得这门语言,那么就可以用这门语言的解码方法(语法规则)来获得信息(日常的说话交流其实就是在编码与解码).</p>
</li>
<li><p>程序员写程序时,其实就是在将自己的想法通过计算机语言进行编码,而编译器则通过生成抽象语法树,词义分析等操作进行解码,最终交给计算机执行程序(编译器产生的解码结果并不是最终结果,一般为汇编语言,但汇编语言只是CPU指令集的助记符,还需要再进行解码).</p>
</li>
</ul>
<ul>
<li>计算机只有两种状态(0和1),要想存储和传输多媒体信息,就需要用到编码和解码.</li>
</ul>
<ul>
<li>对数据进行压缩,其本质就是以减少自身占用的空间为前提进行重新编码.</li>
</ul>
<p>了解了编码的含义,我们接下来重点探究<code>Java</code>中的字符编码.</p>
<blockquote>
<p>本文作者为: <a target="_blank" rel="noopener" href="https://github.com/SylvanasSun/">SylvanasSun</a>.转载请务必将下面这段话置于文章开头处(保留超链接).<br>本文首发自<a target="_blank" rel="noopener" href="https://sylvanassun.github.io/">SylvanasSun Blog</a>,原文链接: <a target="_blank" rel="noopener" href="https://sylvanassun.github.io/2017/08/20/2017-08-20-Encode/">https://sylvanassun.github.io/2017/08/20/2017-08-20-Encode/</a></p>
</blockquote>
<h3 id="常见的字符集"><a href="#常见的字符集" class="headerlink" title="常见的字符集"></a>常见的字符集</h3><hr>
<p><strong>字符集就是字符与二进制的映射表</strong>,每一个字符集都有自己的编码规则,每个字符所占用的字节也不同(支持的字符越多每个字符占用的字节也就越多).</p>
<ul>
<li><p>ASCII : 美国信息交换标准码(American Standard Code for Information Interchange).学过计算机的都知道大名鼎鼎的<code>ASCII</code>码,它是基于拉丁字母的字符集,总共记有128个字符,主要目的是显示英语.其中每个字符占用一个字节(只用到了低7位).</p>
<p> <img src="http://wx4.sinaimg.cn/large/63503acbly1fith5ayrgdj20n50ct0tj.jpg"></p>
</li>
</ul>
<ul>
<li><p>ISO-8859-1 : 它是由国际标准化组织(International Standardization Organization)在<code>ASCII</code>基础上制定的8位字符集(仍然是单字节编码).它在<code>ASCII</code>空置的<code>0xA0-0xFF</code>范围内加入了96个字母与符号,支持了欧洲部分国家的语言.</p>
<p> <img src="http://wx1.sinaimg.cn/large/63503acbly1fith5be3u3j20o80bcgmk.jpg"></p>
</li>
</ul>
<ul>
<li><p>GBK : 如果我们想要让电脑上显示汉字就必须要有支持汉字的字符集,GBK就是这样一个支持汉字的字符集,全称为&lt;&lt;汉字内码扩展规范&gt;&gt;,它的编码方式分为单字节与双字节: <code>00–7F</code>范围内是第一个字节,与<code>ASCII</code>保持一致,之后的双字节中,前一字节是双字节的第一位(范围在<code>81–FE</code>,不包含<code>80</code>和<code>FF</code>),第二字节的一部分在<code>40–7E</code>,其他部分在<code>80–FE</code>.(这里不再介绍<code>GB2313</code>与<code>GB18030</code>,它们都是互相兼容的.)</p>
<p> <img src="http://wx1.sinaimg.cn/large/63503acbly1fith5bqcprj20r00bcjsi.jpg"></p>
</li>
<li><p>UTF-16 : <code>UTF-16</code>是<code>Unicode(统一码,一种以支持世界上多国语言为目的的通用字符集)</code>的一种实现方式,它把<code>Unicode</code>的抽象码位<strong>映射为<code>2~4</code>个字节来表示</strong>,<strong><code>UTF-16</code>是变长编码(<code>UTF-32是真正的定长编码</code>)</strong>,但在最开始以前<code>UTF-16</code>是用来配合<code>UCS-2(UTF-16的子集,它是定长编码,用2个字节表示所有Unicode字符)</code>使用的,主要原因还是因为当时<code>Unicode</code>只有不到65536个字符,2个字节就足以应对一切了.后来,<code>Unicode</code>支持的字符不断膨胀,2个字节已经不够用了,导致一些只支持<code>UCS-2</code>当做内码的产品很尴尬(<code>Java</code>就是其中之一).</p>
<p> <img src="http://wx4.sinaimg.cn/large/63503acbly1fith5c3ib8j20qg0ck3zq.jpg"></p>
</li>
<li><p>UTF-8 : <strong><code>UTF-8</code>也是基于<code>Unicode</code>的变长编码表</strong>,它使用<code>1~6</code>个字节来为每个字符进行编码(<code>RFC 3629</code>对<code>UTF-8</code>进行了重新规范,只能使用原来<code>Unicode</code>定义的区域,<code>U+0000~U+10FFFF</code>,也就是说最多只有4个字节),<code>UTF-8</code>完全兼容<code>ASCII</code>,它的编码规则如下:</p>
<ul>
<li><p>在<code>U+0000~U+007F</code>范围内,只需要一个字节(也就是<code>ASCII</code>字符集中的字符).</p>
</li>
<li><p>在<code>U+0080~U+07FF</code>范围内,需要两个字节(希腊文、阿拉伯文、希伯来文等).</p>
</li>
<li><p>在<code>U+0800~U+FFFF</code>范围内,需要三个字节(亚洲汉字等).</p>
</li>
<li><p>其他的字符使用四个字节.</p>
</li>
</ul>
</li>
</ul>
<p><img src="http://wx2.sinaimg.cn/large/63503acbly1fith5cmmpbj20w008ot9i.jpg"></p>
<h3 id="Java中字符的编解码"><a href="#Java中字符的编解码" class="headerlink" title="Java中字符的编解码"></a>Java中字符的编解码</h3><hr>
<p><code>Java</code>提供了<code>Charset</code>类来完成对字符的编码与解码,主要使用以下函数: </p>
<ul>
<li><code>public static Charset forName(String charsetName)</code> : 这是一个静态工厂函数,它根据传入的字符集名称来返回对应字符集的<code>Charset</code>类.</li>
</ul>
<ul>
<li><code>public final ByteBuffer encode(CharBuffer cb) / public final ByteBuffer encode(String str)</code> : 编码函数,它将传入的字符串或者字符序列进行编码,返回的<code>ByteBuffer</code>是一个字节缓冲区.</li>
</ul>
<ul>
<li><code>public final CharBuffer decode(ByteBuffer bb)</code> : 解码函数,将传入的字节序列解码为字符序列.</li>
</ul>
<h4 id="示例代码"><a href="#示例代码" class="headerlink" title="示例代码"></a>示例代码</h4><hr>
<figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">private</span> <span class="keyword">static</span> <span class="keyword">final</span> String text = <span class="string">&quot;Hello,编码!&quot;</span>;</span><br><span class="line"></span><br><span class="line"><span class="keyword">private</span> <span class="keyword">static</span> <span class="keyword">final</span> Charset ASCII = Charset.forName(<span class="string">&quot;ASCII&quot;</span>);</span><br><span class="line"></span><br><span class="line"><span class="keyword">private</span> <span class="keyword">static</span> <span class="keyword">final</span> Charset ISO_8859_1 = Charset.forName(<span class="string">&quot;ISO-8859-1&quot;</span>);</span><br><span class="line"></span><br><span class="line"><span class="keyword">private</span> <span class="keyword">static</span> <span class="keyword">final</span> Charset GBK = Charset.forName(<span class="string">&quot;GBK&quot;</span>);</span><br><span class="line"></span><br><span class="line"><span class="keyword">private</span> <span class="keyword">static</span> <span class="keyword">final</span> Charset UTF_16 = Charset.forName(<span class="string">&quot;UTF-16&quot;</span>);</span><br><span class="line"></span><br><span class="line"><span class="keyword">private</span> <span class="keyword">static</span> <span class="keyword">final</span> Charset UTF_8 = Charset.forName(<span class="string">&quot;UTF-8&quot;</span>);</span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">private</span> <span class="keyword">static</span> <span class="keyword">void</span> <span class="title">encodeAndPrint</span><span class="params">(Charset charset)</span> </span>&#123;</span><br><span class="line">	System.out.println(charset.name() + <span class="string">&quot;: &quot;</span>);</span><br><span class="line">	printHex(text.toCharArray(), charset);</span><br><span class="line">	System.out.println(<span class="string">&quot;----------------------------------&quot;</span>);</span><br><span class="line">&#125;</span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">private</span> <span class="keyword">static</span> <span class="keyword">void</span> <span class="title">printHex</span><span class="params">(<span class="keyword">char</span>[] chars, Charset charset)</span> </span>&#123;</span><br><span class="line">	System.out.println(<span class="string">&quot;ForEach: &quot;</span>);</span><br><span class="line">	ByteBuffer byteBuffer;</span><br><span class="line">	<span class="keyword">byte</span>[] bytes;</span><br><span class="line">	<span class="keyword">if</span> (chars != <span class="keyword">null</span>) &#123;</span><br><span class="line">		<span class="keyword">for</span> (<span class="keyword">char</span> c : chars) &#123;</span><br><span class="line">			System.out.print(<span class="string">&quot;char: &quot;</span> + Integer.toHexString(c) + <span class="string">&quot; &quot;</span>);</span><br><span class="line">			<span class="comment">// 打印出字符编码后对应的字节</span></span><br><span class="line">			byteBuffer = charset.encode(String.valueOf(c));</span><br><span class="line">			bytes = byteBuffer.array();</span><br><span class="line">			System.out.print(<span class="string">&quot;byte: &quot;</span>);</span><br><span class="line">			<span class="keyword">if</span> (bytes != <span class="keyword">null</span>) &#123;</span><br><span class="line">				<span class="keyword">for</span> (<span class="keyword">byte</span> b : bytes)</span><br><span class="line">					System.out.print(Integer.toHexString(b &amp; <span class="number">0xFF</span>) + <span class="string">&quot; &quot;</span>);</span><br><span class="line">			&#125;</span><br><span class="line">			System.out.println();</span><br><span class="line">		&#125;</span><br><span class="line">	&#125;</span><br><span class="line">	System.out.println();</span><br><span class="line">&#125;</span><br></pre></td></tr></table></figure>
<p>有的读者可能会对以上代码中的<code>b &amp; 0xFF</code>产生疑惑,这是为了解决符号扩展问题.在<code>Java</code>中,<strong>如果一个窄类型强转为一个宽类型时,会对多出来的空位进行符号扩展(如果符号位为1,就补1,为0则补0)</strong>.只有<code>char</code>类型除外,<code>char</code>是没有符号位的,所以它永远都是补0.</p>
<p>代码中调用了函数<code>Integer.toHexString()</code>,变量<code>b</code>在运算之前就已经被强转为了<code>int</code>类型,为了让数值不受到破坏,我们让<code>b</code>对<code>0xFF</code>进行了与运算,<code>0xFF</code>是一个低八位都为1的值(其他位都为0),而<code>byte</code>的有效范围只在低八位,所以结果为前24位(除符号位)都变为了0,低八位保留了原有的值.</p>
<p>如果不做这项操作,那么<code>b</code>又恰好是个负数的话,那这个强转后的<code>int</code>的前24位都会变为1,这个结果显然已经破坏了原有的值.</p>
<h3 id="IO中的字符编码"><a href="#IO中的字符编码" class="headerlink" title="IO中的字符编码"></a>IO中的字符编码</h3><hr>
<p><code>Reader</code>与<code>Writer</code>是<code>Java</code>中负责字符输入与输出的抽象基类,它们的子类实现了在各种场景中的字符输入输出功能.</p>
<p>在使用<code>Reader</code>与<code>Writer</code>进行<code>IO</code>操作时,需要指定字符集,如果不显式指定的话会默认使用当前环境的字符集,但我还是推荐显式指定<strong>一致的字符集</strong>,这样才不会出现乱码问题(<code>Reader</code>与<code>Writer</code>指定的字符集不一致或更改了环境导致字符集不一致等).</p>
<figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br><span class="line">27</span><br><span class="line">28</span><br><span class="line">29</span><br><span class="line">30</span><br><span class="line">31</span><br><span class="line">32</span><br><span class="line">33</span><br><span class="line">34</span><br><span class="line">35</span><br><span class="line">36</span><br><span class="line">37</span><br><span class="line">38</span><br><span class="line">39</span><br><span class="line">40</span><br><span class="line">41</span><br><span class="line">42</span><br><span class="line">43</span><br><span class="line">44</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">public</span> <span class="keyword">static</span> <span class="keyword">void</span> <span class="title">writeChar</span><span class="params">(String content, String filename, String charset)</span> </span>&#123;</span><br><span class="line">	OutputStreamWriter writer = <span class="keyword">null</span>;</span><br><span class="line"></span><br><span class="line">	<span class="keyword">try</span> &#123;</span><br><span class="line">		FileOutputStream outputStream = <span class="keyword">new</span> FileOutputStream(filename);</span><br><span class="line">		writer = <span class="keyword">new</span> OutputStreamWriter(outputStream, charset);</span><br><span class="line">		writer.write(content);</span><br><span class="line">	&#125; <span class="keyword">catch</span> (IOException e) &#123;</span><br><span class="line">		e.printStackTrace();</span><br><span class="line">	&#125; <span class="keyword">finally</span> &#123;</span><br><span class="line">		<span class="keyword">try</span> &#123;</span><br><span class="line">			<span class="keyword">if</span> (writer != <span class="keyword">null</span>)</span><br><span class="line">				writer.close();</span><br><span class="line">		&#125; <span class="keyword">catch</span> (IOException e) &#123;</span><br><span class="line">			e.printStackTrace();</span><br><span class="line">		&#125;</span><br><span class="line">	&#125;</span><br><span class="line">&#125;</span><br><span class="line"></span><br><span class="line"><span class="function"><span class="keyword">public</span> <span class="keyword">static</span> String <span class="title">readChar</span><span class="params">(String filename, String charset)</span> </span>&#123;</span><br><span class="line">	InputStreamReader reader = <span class="keyword">null</span>;</span><br><span class="line">	StringBuilder sb = <span class="keyword">null</span>;</span><br><span class="line"></span><br><span class="line">	<span class="keyword">try</span> &#123;</span><br><span class="line">		FileInputStream inputStream = <span class="keyword">new</span> FileInputStream(filename);</span><br><span class="line">		reader = <span class="keyword">new</span> InputStreamReader(inputStream, charset);</span><br><span class="line">		<span class="keyword">char</span>[] buf = <span class="keyword">new</span> <span class="keyword">char</span>[<span class="number">64</span>];</span><br><span class="line">		<span class="keyword">int</span> count = <span class="number">0</span>;</span><br><span class="line">		sb = <span class="keyword">new</span> StringBuilder();</span><br><span class="line">		<span class="keyword">while</span> ((count = reader.read(buf)) != -<span class="number">1</span>)</span><br><span class="line">			sb.append(buf, <span class="number">0</span>, count);</span><br><span class="line">	&#125; <span class="keyword">catch</span> (IOException e) &#123;</span><br><span class="line">		e.printStackTrace();</span><br><span class="line">	&#125; <span class="keyword">finally</span> &#123;</span><br><span class="line">		<span class="keyword">try</span> &#123;</span><br><span class="line">			<span class="keyword">if</span> (reader != <span class="keyword">null</span>)</span><br><span class="line">				reader.close();</span><br><span class="line">		&#125; <span class="keyword">catch</span> (IOException e) &#123;</span><br><span class="line">			e.printStackTrace();</span><br><span class="line">		&#125;</span><br><span class="line">	&#125;</span><br><span class="line"></span><br><span class="line">	<span class="keyword">return</span> sb.toString();</span><br><span class="line">&#125;</span><br></pre></td></tr></table></figure>

<h3 id="Web中的字符编码"><a href="#Web中的字符编码" class="headerlink" title="Web中的字符编码"></a>Web中的字符编码</h3><hr>
<p>在<code>Web</code>开发中,乱码也是经常存在的一个问题,主要体现在请求的参数和返回的响应结果,最头疼的是不同的浏览器的默认编码甚至还不一致.</p>
<p><code>Java</code>以<code>Http</code>的请求与响应抽象出了<code>Request</code>和<code>Response</code>两个对象,只要保持<strong>请求与响应的编码一致</strong>就能避免乱码问题.</p>
<p><code>Request</code>提供了<code>setCharacterEncoding(String encode)</code>函数来改变请求体的编码,一般通过写一个过滤器来统一对所有请求设置编码.</p>
<figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br></pre></td><td class="code"><pre><span class="line">request.setCharacterEncoding(<span class="string">&quot;UTF-8&quot;</span>);</span><br></pre></td></tr></table></figure>
<p><code>Response</code>提供了<code>setCharacterEncoding(String encode)</code>与<code>setHeader(String name,String value)</code>两个函数,它们都可以设置响应的编码.</p>
<figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br></pre></td><td class="code"><pre><span class="line">response.setCharacterEncoding(<span class="string">&quot;UTF-8&quot;</span>);</span><br><span class="line"><span class="comment">// 设置响应头的编码信息,同时也告知了浏览器该如何解码</span></span><br><span class="line">response.setHeader(<span class="string">&quot;Content-Type&quot;</span>,<span class="string">&quot;text/html;charset=UTF-8&quot;</span>); </span><br></pre></td></tr></table></figure>
<p>还有一种更简便的方式,直接使用<code>Spring</code>提供的<code>CharacterEncodingFilter</code>,该过滤器就是用来统一编码的.</p>
<figure class="highlight xml"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br></pre></td><td class="code"><pre><span class="line"><span class="tag">&lt;<span class="name">filter</span>&gt;</span></span><br><span class="line">    <span class="tag">&lt;<span class="name">filter-name</span>&gt;</span>charsetFilter<span class="tag">&lt;/<span class="name">filter-name</span>&gt;</span></span><br><span class="line">    <span class="tag">&lt;<span class="name">filter-class</span>&gt;</span>org.springframework.web.filter.CharacterEncodingFilter<span class="tag">&lt;/<span class="name">filter-class</span>&gt;</span></span><br><span class="line">    <span class="tag">&lt;<span class="name">init-param</span>&gt;</span></span><br><span class="line">        <span class="tag">&lt;<span class="name">param-name</span>&gt;</span>encoding<span class="tag">&lt;/<span class="name">param-name</span>&gt;</span></span><br><span class="line">        <span class="tag">&lt;<span class="name">param-value</span>&gt;</span>UTF-8<span class="tag">&lt;/<span class="name">param-value</span>&gt;</span></span><br><span class="line">    <span class="tag">&lt;/<span class="name">init-param</span>&gt;</span></span><br><span class="line">    <span class="tag">&lt;<span class="name">init-param</span>&gt;</span></span><br><span class="line">        <span class="tag">&lt;<span class="name">param-name</span>&gt;</span>forceEncoding<span class="tag">&lt;/<span class="name">param-name</span>&gt;</span></span><br><span class="line">        <span class="tag">&lt;<span class="name">param-value</span>&gt;</span>true<span class="tag">&lt;/<span class="name">param-value</span>&gt;</span></span><br><span class="line">    <span class="tag">&lt;/<span class="name">init-param</span>&gt;</span></span><br><span class="line"><span class="tag">&lt;/<span class="name">filter</span>&gt;</span></span><br><span class="line"><span class="tag">&lt;<span class="name">filter-mapping</span>&gt;</span></span><br><span class="line">   <span class="tag">&lt;<span class="name">filter-name</span>&gt;</span>charsetFilter<span class="tag">&lt;/<span class="name">filter-name</span>&gt;</span></span><br><span class="line">   <span class="tag">&lt;<span class="name">url-pattern</span>&gt;</span>*<span class="tag">&lt;/<span class="name">url-pattern</span>&gt;</span></span><br><span class="line"><span class="tag">&lt;/<span class="name">filter-mapping</span>&gt;</span></span><br></pre></td></tr></table></figure>
<p><code>CharacterEncodingFilter</code>的实现如下: </p>
<figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br><span class="line">6</span><br><span class="line">7</span><br><span class="line">8</span><br><span class="line">9</span><br><span class="line">10</span><br><span class="line">11</span><br><span class="line">12</span><br><span class="line">13</span><br><span class="line">14</span><br><span class="line">15</span><br><span class="line">16</span><br><span class="line">17</span><br><span class="line">18</span><br><span class="line">19</span><br><span class="line">20</span><br><span class="line">21</span><br><span class="line">22</span><br><span class="line">23</span><br><span class="line">24</span><br><span class="line">25</span><br><span class="line">26</span><br></pre></td><td class="code"><pre><span class="line"><span class="keyword">public</span> <span class="class"><span class="keyword">class</span> <span class="title">CharacterEncodingFilter</span> <span class="keyword">extends</span> <span class="title">OncePerRequestFilter</span> </span>&#123;</span><br><span class="line">    <span class="keyword">private</span> String encoding;</span><br><span class="line">    <span class="keyword">private</span> <span class="keyword">boolean</span> forceEncoding = <span class="keyword">false</span>;</span><br><span class="line"></span><br><span class="line">    <span class="function"><span class="keyword">public</span> <span class="title">CharacterEncodingFilter</span><span class="params">()</span> </span>&#123;</span><br><span class="line">    &#125;</span><br><span class="line"></span><br><span class="line">    <span class="function"><span class="keyword">public</span> <span class="keyword">void</span> <span class="title">setEncoding</span><span class="params">(String encoding)</span> </span>&#123;</span><br><span class="line">        <span class="keyword">this</span>.encoding = encoding;</span><br><span class="line">    &#125;</span><br><span class="line"></span><br><span class="line">    <span class="function"><span class="keyword">public</span> <span class="keyword">void</span> <span class="title">setForceEncoding</span><span class="params">(<span class="keyword">boolean</span> forceEncoding)</span> </span>&#123;</span><br><span class="line">        <span class="keyword">this</span>.forceEncoding = forceEncoding;</span><br><span class="line">    &#125;</span><br><span class="line"></span><br><span class="line">    <span class="function"><span class="keyword">protected</span> <span class="keyword">void</span> <span class="title">doFilterInternal</span><span class="params">(HttpServletRequest request, HttpServletResponse response, FilterChain filterChain)</span> <span class="keyword">throws</span> ServletException, IOException </span>&#123;</span><br><span class="line">        <span class="keyword">if</span>(<span class="keyword">this</span>.encoding != <span class="keyword">null</span> &amp;&amp; (<span class="keyword">this</span>.forceEncoding || request.getCharacterEncoding() == <span class="keyword">null</span>)) &#123;</span><br><span class="line">            request.setCharacterEncoding(<span class="keyword">this</span>.encoding);</span><br><span class="line">            <span class="keyword">if</span>(<span class="keyword">this</span>.forceEncoding) &#123;</span><br><span class="line">                response.setCharacterEncoding(<span class="keyword">this</span>.encoding);</span><br><span class="line">            &#125;</span><br><span class="line">        &#125;</span><br><span class="line"></span><br><span class="line">        filterChain.doFilter(request, response);</span><br><span class="line">    &#125;</span><br><span class="line">&#125;</span><br></pre></td></tr></table></figure>

<h3 id="为什么Char在Java中占用两个字节"><a href="#为什么Char在Java中占用两个字节" class="headerlink" title="为什么Char在Java中占用两个字节?"></a>为什么Char在Java中占用两个字节?</h3><hr>
<p>众所周知,在<code>Java</code>中一个<code>char</code>类型占用两个字节,那么这是为什么呢?这是因为<code>Java</code>使用了<code>UTF-16</code>当作内码.</p>
<p><strong>内码(<code>Internal Encoding</code>)就是程序内部所使用的编码</strong>,主要在于编程语言实现其<code>char</code>和<code>String</code>类型在内存中使用的内部编码.与之相对的就是<strong>外码(<code>External Encoding</code>),它是程序与外部交互时使用的字符编码</strong>.</p>
<p>值得一提的是,当初<code>UTF-16</code>是配合<code>UCS-2</code>使用的,后来<code>Unicode</code>支持的字符不断增多,<code>UTF-16</code>也不再只当作一个定长的2字节编码使用了,也就是说,<strong><code>Java</code>中的一个<code>char</code>其实并不一定能代表一个完整的<code>UTF-16</code>字符.</strong></p>
<p><code>String.getBytes()</code>可以将该String的内码转换为指定的外码并返回这个编完码的字节数组(无参数版使用当前平台的默认编码).</p>
<figure class="highlight java"><table><tr><td class="gutter"><pre><span class="line">1</span><br><span class="line">2</span><br><span class="line">3</span><br><span class="line">4</span><br><span class="line">5</span><br></pre></td><td class="code"><pre><span class="line"><span class="function"><span class="keyword">public</span> <span class="keyword">static</span> <span class="keyword">void</span> <span class="title">main</span><span class="params">(String[] args)</span> <span class="keyword">throws</span> UnsupportedEncodingException </span>&#123;</span><br><span class="line">	String text = <span class="string">&quot;码&quot;</span>;</span><br><span class="line">	<span class="keyword">byte</span>[] bytes = text.getBytes(<span class="string">&quot;UTF-8&quot;</span>); </span><br><span class="line">	System.out.println(bytes.length); <span class="comment">// 输出3</span></span><br><span class="line">&#125;</span><br></pre></td></tr></table></figure>
<p><code>Java</code>还规定<code>char</code>与<code>String</code>类型的序列化是使用<code>UTF-8</code>当作外码的,<code>Java</code>中的<code>Class</code>文件中的字符串常量与符号名也都规定使用<code>UTF-8</code>.这种设计是为了平衡运行时的时间效率与外部存储的空间效率所做的取舍.</p>
<p>在<code>SUN JDK6</code>中,有一条命令<code>-XX:+UseCompressedString</code>.该命令可以让<code>String</code>内部存储字符内容可能用<code>byte[]</code>也可能用<code>char[]</code>: 当整个字符串所有字符处于<code>ASCII</code>字符集范围内时,就使用<code>byte[]</code>(使用了<code>ASCII</code>编码)来存储,如果有任一字符超过了<code>ASCII</code>的范围,就退回到使用<code>char[]</code>(<code>UTF-16</code>编码)来存储.但是这个功能实现的并不理想,所以没有包含在<code>Open JDK6</code>/<code>Open JDK7</code>/<code>Oracle JDK7</code>等后续版本中.</p>
<p><code>JavaScript</code>也使用了<code>UTF-16</code>作为内码,其实现也广泛应用了<code>CompressedString</code>的思想,主流的<code>JavaScript</code>引擎中都会尽可能使用<code>ASCII</code>内码的字符串,不过这些细节都是对外隐藏的..</p>
<h3 id="参考文献"><a href="#参考文献" class="headerlink" title="参考文献"></a>参考文献</h3><hr>
<ul>
<li><p><a target="_blank" rel="noopener" href="https://en.wikipedia.org/wiki/ASCII">ASCII - Wikipedia</a></p>
</li>
<li><p><a target="_blank" rel="noopener" href="https://en.wikipedia.org/wiki/ISO/IEC_8859-1">ISO/IEC 8859-1 - Wikipedia</a></p>
</li>
</ul>
<ul>
<li><a target="_blank" rel="noopener" href="https://en.wikipedia.org/wiki/GBK">GBK - Wikipedia</a></li>
</ul>
<ul>
<li><a target="_blank" rel="noopener" href="https://en.wikipedia.org/wiki/UTF-16">UTF-16 - Wikipedia</a></li>
</ul>
<ul>
<li><a target="_blank" rel="noopener" href="https://en.wikipedia.org/wiki/UTF-8">UTF-8 - Wikipedia</a></li>
</ul>
<ul>
<li><a target="_blank" rel="noopener" href="https://www.zhihu.com/question/27562173/answer/37188642">Java 语言中一个字符占几个字节？ - RednaxelaFX的回答</a></li>
</ul>

    </div>

    
    
    

    <footer class="post-footer">
          <div class="post-tags">
              <a href="/yuwanzi.io/tags/Java/" rel="tag"># Java</a>
              <a href="/yuwanzi.io/tags/2017/" rel="tag"># 2017</a>
              <a href="/yuwanzi.io/tags/%E5%90%8E%E7%AB%AF/" rel="tag"># 后端</a>
              <a href="/yuwanzi.io/tags/%E7%BC%96%E7%A0%81/" rel="tag"># 编码</a>
          </div>

        

          <div class="post-nav">
            <div class="post-nav-item">
                <a href="/yuwanzi.io/2017/08/13/2017-08-13-BTrees/" rel="prev" title="B树的那点事儿">
                  <i class="fa fa-chevron-left"></i> B树的那点事儿
                </a>
            </div>
            <div class="post-nav-item">
                <a href="/yuwanzi.io/2017/09/08/2017-09-08-ComputerStructure/" rel="next" title="探索计算机的结构与核心概念">
                  探索计算机的结构与核心概念 <i class="fa fa-chevron-right"></i>
                </a>
            </div>
          </div>
    </footer>
  </article>
</div>







<script>
  window.addEventListener('tabs:register', () => {
    let { activeClass } = CONFIG.comments;
    if (CONFIG.comments.storage) {
      activeClass = localStorage.getItem('comments_active') || activeClass;
    }
    if (activeClass) {
      const activeTab = document.querySelector(`a[href="#comment-${activeClass}"]`);
      if (activeTab) {
        activeTab.click();
      }
    }
  });
  if (CONFIG.comments.storage) {
    window.addEventListener('tabs:click', event => {
      if (!event.target.matches('.tabs-comment .tab-content .tab-pane')) return;
      const commentClass = event.target.classList[1];
      localStorage.setItem('comments_active', commentClass);
    });
  }
</script>
</div>
  </main>

  <footer class="footer">
    <div class="footer-inner">


<div class="copyright">
  &copy; 
  <span itemprop="copyrightYear">2021</span>
  <span class="with-love">
    <i class="fa fa-heart"></i>
  </span>
  <span class="author" itemprop="copyrightHolder">玉丸子</span>
</div>
  <div class="powered-by">Erstellt mit  <a href="https://hexo.io/" class="theme-link" rel="noopener" target="_blank">Hexo</a> & <a href="https://theme-next.js.org/muse/" class="theme-link" rel="noopener" target="_blank">NexT.Muse</a>
  </div>

    </div>
  </footer>

  
  <script src="//cdn.jsdelivr.net/npm/animejs@3.2.1/lib/anime.min.js"></script>
<script src="/yuwanzi.io/js/utils.js"></script><script src="/yuwanzi.io/js/motion.js"></script><script src="/yuwanzi.io/js/schemes/muse.js"></script><script src="/yuwanzi.io/js/next-boot.js"></script>

  






  





</body>
</html>
